This document aims at exploring the dataset of 4 individuals in 2018. For that purpose, we need first to load the weanlingNES package to load data.
# load library
library(weanlingNES)
# load data
data("data_nes", package = "weanlingNES")
# load("../data/data_nes.rda")
Let’s have a look at what’s inside data_nes$data_2018:
# list structure
str(data_nes$year_2018, max.level = 1, give.attr = F, no.list = T)
## $ ind_2018070:Classes 'data.table' and 'data.frame': 22393 obs. of 47 variables:
## $ ind_2018072:Classes 'data.table' and 'data.frame': 29921 obs. of 47 variables:
## $ ind_2018074:Classes 'data.table' and 'data.frame': 38608 obs. of 47 variables:
## $ ind_2018080:Classes 'data.table' and 'data.frame': 19028 obs. of 47 variables:
A list of 4 data.frames, one for each seal
For convenience, we aggregate all 4 individuals into one dataset.
# combine all individuals
data_2018 <- rbindlist(data_nes$year_2018)
# display
DT::datatable(data_2018[sample.int(.N, 10), ], options = list(scrollX = T))
Table 1: Sample of 10 random rows from data_2018
Summary
# raw_data
data_2018[, .(
nb_days_recorded = uniqueN(as.Date(date)),
nb_dives = .N,
maxdepth_mean = mean(maxdepth),
dduration_mean = mean(dduration),
botttime_mean = mean(botttime),
pdi_mean = mean(pdi, na.rm = T)
), by = .id] %>%
sable(
caption = "Summary diving information relative to each 2018 individual",
digits = 2
)
Table 2: Summary diving information relative to each 2018 individual
|
.id
|
nb_days_recorded
|
nb_dives
|
maxdepth_mean
|
dduration_mean
|
botttime_mean
|
pdi_mean
|
|
ind_2018070
|
232
|
22393
|
305.52
|
783.27
|
243.22
|
109.55
|
|
ind_2018072
|
341
|
29921
|
357.86
|
876.96
|
278.02
|
104.90
|
|
ind_2018074
|
372
|
38608
|
250.67
|
686.25
|
291.89
|
302.77
|
|
ind_2018080
|
215
|
19028
|
296.50
|
867.69
|
339.90
|
103.51
|
Very nice dataset :)
Some explanatory plots
Missing values
# build dataset to check for missing values
dataPlot <- melt(data_2018[, .(.id, is.na(.SD)), .SDcol = -c(
".id",
"divenumber",
"year",
"month",
"day",
"hour",
"min",
"sec",
"juldate",
"divetype",
"date",
"phase",
"lat",
"lon"
)])
# add the id of rows
dataPlot[, id_row := c(1:.N), by = c("variable", ".id")]
# plot
ggplot(dataPlot, aes(x = variable, y = id_row, fill = value)) +
geom_tile() +
labs(x = "Attributes", y = "Rows") +
scale_fill_manual(
values = c("white", "black"),
labels = c("Real", "Missing")
) +
facet_wrap(.id ~ ., scales = "free_y") +
theme_jjo() +
theme(
legend.position = "top",
axis.text.x = element_text(angle = 45, hjust = 1),
legend.key = element_rect(colour = "black")
)
So far so good, only few variables seems to have missing values:
# table with percent
table_inter <- data_2018[, lapply(.SD, function(x) {
round(length(x[is.na(x)]) * 100 / length(x), 1)
}), .SDcol = -c(
".id",
"divenumber",
"year",
"month",
"day",
"hour",
"min",
"sec",
"juldate",
"divetype",
"date",
"phase",
"lat",
"lon"
)]
# find which are different from 0
cond_inter <- sapply(table_inter, function(x) {
x == 0
})
# display the percentages that are over 0
table_inter[, which(cond_inter) := NULL] %>%
sable(caption = "Percentage of missing values per columns having missing values!") %>%
scroll_box(width = "100%")
Table 3: Percentage of missing values per columns having missing values!
|
lightatsurf
|
lattenuation
|
euphoticdepth
|
thermoclinedepth
|
driftrate
|
benthicdivevertrate
|
cornerindex
|
foragingindex
|
verticalspeed90perc
|
verticalspeed95perc
|
dist_dep
|
|
26.3
|
89
|
62.6
|
1.3
|
0.5
|
22.7
|
75.8
|
0.5
|
0.1
|
0.1
|
35.1
|
Outliers
Ok, let’s have a look at all the data. But first, we have to remove outliers. Some of them are quiet easy to spot looking at the distribution of dive duration:
Before
ggplot(
data_2018[, .SD][, state := "Before"],
aes(x = dduration, fill = .id)
) +
geom_histogram(show.legend = FALSE) +
geom_vline(xintercept = 3000, linetype = "longdash") +
facet_grid(state ~ .id,
scales = "free"
) +
labs(y = "# of dives", x = "Dive duration (s)") +
theme_jjo()
After
ggplot(
data_2018[dduration < 3000, ][][, state := "After"],
aes(x = dduration, fill = .id)
) +
geom_histogram(show.legend = FALSE) +
geom_vline(xintercept = 3000, linetype = "longdash") +
facet_grid(state ~ .id,
scales = "free"
) +
labs(x = "# of dives", y = "Dive duration (s)") +
theme_jjo()
It seems much better, so let’s remove any rows with dduration > 3000 sec.
# filter data
data_2018_filter <- data_2018[dduration < 3000, ]
# nbrow removed
data_2018[dduration >= 3000, .(nb_row_removed = .N), by = .id] %>%
sable(caption = "# of rows removed by 2018-individuals")
Table 4: # of rows removed by 2018-individuals
|
.id
|
nb_row_removed
|
|
ind_2018070
|
3
|
|
ind_2018072
|
1
|
|
ind_2018074
|
33
|
Check day and night
Light levels
# let's first average `lightatsurf` by individuals, day since departure and hour
dataPlot <- data_2018[, .(lightatsurf = median(lightatsurf)),
by = .(.id, day_departure, date = as.Date(date), hour)
]
# display the result
ggplot(dataPlot, aes(x = day_departure, y = hour, fill = lightatsurf)) +
geom_tile() +
facet_grid(.id ~ .) +
theme_jjo() +
labs(x = "# of days since departure",
y = "Hour",
fill = "Light level at the surface")+
theme(legend.position = c("bottom"))
Day and night detection
# let's first average `lightatsurf` by individuals, day since departure and hour
dataPlot <- data_2018[, .(lightatsurf = median(lightatsurf)),
by = .(.id,
day_departure,
date = as.Date(date),
hour,
phase)
]
# display the result
ggplot(dataPlot, aes(x = day_departure, y = hour, fill = phase)) +
geom_tile() +
facet_grid(.id ~ .) +
theme_jjo() +
labs(x = "# of days since departure",
y = "Hour",
fill = "Day time and night time as detected by the `cal_phase_day` function") +
theme(legend.position = c("bottom"))
All Variables
names_display <- names(data_2018_filter[, -c(
".id",
"date",
"divenumber",
"year",
"month",
"day",
"hour",
"min",
"sec",
"juldate",
"divetype",
"euphoticdepth",
"thermoclinedepth",
"day_departure",
"phase",
"lat",
"lon",
"dist_dep"
)])
for (i in names_display) {
cat("#####", i, "{.unlisted .unnumbered} \n")
if (i == "maxdepth") {
print(
ggplot() +
geom_point(
data = data_2018_filter[, .(
.id,
date,
thermoclinedepth
)],
aes(
x = as.Date(date),
y = -thermoclinedepth,
colour = "Thermocline (m)"
),
alpha = .2,
size = .5
) +
geom_point(
data = data_2018_filter[, .(
.id,
date,
euphoticdepth
)],
aes(
x = as.Date(date),
y = -euphoticdepth,
colour = "Euphotic (m)"
),
alpha = .2,
size = .5
) +
scale_colour_manual(
values = c(
"Thermocline (m)" = "red",
"Euphotic (m)" = "black"
),
name = "Zone"
) +
new_scale_color() +
geom_point(
data = melt(data_2018_filter[, .(.id, date, get(i))],
id.vars = c(".id", "date")),
aes(
x = as.Date(date),
y = -value,
col = .id
),
alpha = 1 / 10,
size = .5,
show.legend = FALSE
) +
facet_wrap(. ~ .id, scales = "free") +
scale_x_date(date_labels = "%m/%Y") +
labs(x = "Date", y = "Maximum Depth (m)") +
theme_jjo() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "bottom"
)
)
cat("<blockquote> Considering `ind_2018074` has slightly different values than other individuals for the thermocline depth, it would be interesting to see where the animal went. </blockquote>")
} else if (i == "driftrate") {
print(
ggplot(
data = melt(data_2018_filter[, .(.id, date, get(i), divetype)],
id.vars = c(".id", "date", "divetype")),
aes(
x = as.Date(date),
y = value,
col = divetype
)
) +
geom_point(
alpha = 1 / 10,
size = .5
) +
facet_wrap(. ~ .id, scales = "free") +
scale_x_date(date_labels = "%m/%Y") +
labs(x = "Date", y = "Drift Rate 'm/s", col = "Dive Type") +
theme_jjo() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "bottom"
) +
guides(colour = guide_legend(override.aes = list(
size = 7,
alpha = 1
)))
)
} else {
print(
ggplot(
data = melt(data_2018_filter[, .(.id, date, get(i))],
id.vars = c(".id", "date")),
aes(
x = as.Date(date),
y = value,
col = .id
)
) +
geom_point(
show.legend = FALSE,
alpha = 1 / 10,
size = .5
) +
facet_wrap(. ~ .id, scales = "free") +
scale_x_date(date_labels = "%m/%Y") +
labs(x = "Date", y = i) +
theme_jjo() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
)
}
cat("\n \n")
}
maxdepth
Considering ind_2018074 has slightly different values than other individuals for the thermocline depth, it would be interesting to see where the animal went.
dduration

botttime

desctime

descrate

asctime

ascrate

pdi

dwigglesdesc

dwigglesbott

dwigglesasc

totvertdistbot

bottrange

efficiency

idz

lightatbott

lwiggles

lightatsurf

lattenuation

tempatsurf

tempatbott

driftdiveindex

driftrate

benthicdiveindex

benthicdivevertrate

cornerindex

foragingindex

verticalspeed90perc

verticalspeed95perc

Few questions, that I should look into it:
- is the bimodal distribution of
dduration, desctime due to nycthemeral migration?
- is the bimodal distribution of
descrate (especially for ind2018070 and ind_2018072) due to drift dive?
- is
lightatbott could be used to identify bioluminescence, cause it seems there is a lot going on at the bottom?
- are the variations observed for
lightatsurf is due to moon cycle?
- not sure why is there a bimodal distribution of
tempatbott!
drifrate that one is awesome! Thanks to divetype we can clearly see a pattern of how driftrate (and so buoyancy) change according time.
- the bimodal distribution of
verticalspeed90 and verticalspeed95 should be due to drift dive.
# same plot with a colored for the phase of the day
for (i in names_display) {
cat("####", i, "{-} \n")
print(
ggplot(
data = melt(data_2018_filter[, .(.id, date, get(i), phase)],
id.vars = c(
".id",
"date",
"phase"
)
),
aes(
x = as.Date(date),
y = value,
col = phase
)
) +
geom_point(
alpha = 1 / 10,
size = .5
) +
facet_wrap(. ~ .id, scales = "free") +
scale_x_date(date_labels = "%m/%Y") +
labs(x = "Date", y = i) +
theme_jjo() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "bottom"
) +
guides(colour = guide_legend(override.aes = list(
size = 7,
alpha = 1
)))
)
cat("\n \n")
}
maxdepth

dduration

botttime

desctime

descrate

asctime

ascrate

pdi

dwigglesdesc

dwigglesbott

dwigglesasc

totvertdistbot

bottrange

efficiency

idz

lightatbott

lwiggles

lightatsurf

lattenuation

tempatsurf

tempatbott

driftdiveindex

driftrate

benthicdiveindex

benthicdivevertrate

cornerindex

foragingindex

verticalspeed90perc

verticalspeed95perc

All Variables during the first month
for (i in names_display) {
cat("####", i, "{.unlisted .unnumbered} \n")
if (i == "maxdepth") {
print(
ggplot() +
geom_point(
data = data_2018_filter[day_departure < 32, .(
.id,
day_departure,
thermoclinedepth
)],
aes(
x = day_departure,
y = -thermoclinedepth,
colour = "Thermocline (m)",
group = day_departure
),
alpha = .2,
size = .5
) +
geom_point(
data = data_2018_filter[day_departure < 32, .(
.id,
day_departure,
euphoticdepth
)],
aes(
x = day_departure,
y = -euphoticdepth,
colour = "Euphotic (m)",
group = day_departure
),
alpha = .2,
size = .5
) +
scale_colour_manual(
values = c(
"Thermocline (m)" = "red",
"Euphotic (m)" = "black"
),
name = "Zone"
) +
new_scale_color() +
geom_boxplot(
data = melt(data_2018_filter[day_departure < 32,
.(.id, day_departure, get(i))],
id.vars = c(".id", "day_departure")),
aes(
x = day_departure,
y = -value,
col = .id,
group = day_departure
),
alpha = 1 / 10,
size = .5,
show.legend = FALSE
) +
facet_wrap(. ~ .id, scales = "free") +
labs(x = "# days since departure", y = "Maximum Depth (m)") +
theme_jjo() +
theme(legend.position = "bottom")
)
} else {
print(
ggplot(
data = melt(data_2018_filter[day_departure < 32,
.(.id, day_departure, get(i))],
id.vars = c(".id", "day_departure")),
aes(
x = day_departure,
y = value,
color = .id,
group = day_departure
)
) +
geom_boxplot(
show.legend = FALSE,
alpha = 1 / 10,
size = .5
) +
facet_wrap(. ~ .id, scales = "free") +
labs(x = "# days since departure", y = i) +
theme_jjo()
)
}
cat("\n \n")
}
maxdepth

dduration

botttime

desctime

descrate

asctime

ascrate

pdi

dwigglesdesc

dwigglesbott

dwigglesasc

totvertdistbot

bottrange

efficiency

idz

lightatbott

lwiggles

lightatsurf

lattenuation

tempatsurf

tempatbott

driftdiveindex

driftrate

benthicdiveindex

benthicdivevertrate

cornerindex

foragingindex

verticalspeed90perc

verticalspeed95perc

for (i in names_display) {
cat("####", i, "{.unlisted .unnumbered} \n")
print(
ggplot(
data = melt(data_2018_filter[
day_departure < 32,
.(.id, day_departure, get(i), phase)
],
id.vars = c(".id", "day_departure", "phase")
),
aes(
x = day_departure,
y = value,
color = phase,
group = interaction(day_departure, phase),
)
) +
geom_boxplot(
alpha = 1 / 10,
size = .5
) +
facet_wrap(. ~ .id, scales = "free") +
labs(x = "# days since departure", y = i) +
theme_jjo() +
theme(legend.position = "bottom")
)
cat("\n \n")
}
maxdepth

dduration

botttime

desctime

descrate

asctime

ascrate

pdi

dwigglesdesc

dwigglesbott

dwigglesasc

totvertdistbot

bottrange

efficiency

idz

lightatbott

lwiggles

lightatsurf

lattenuation

tempatsurf

tempatbott

driftdiveindex

driftrate

benthicdiveindex

benthicdivevertrate

cornerindex

foragingindex

verticalspeed90perc

verticalspeed95perc

Correlation
Can we find nice correlation?
# compute correlation
corr_2018 <- round(cor(data_2018_filter[, names_display, with = F],
use = "pairwise.complete.obs"
), 1)
# replace NA value by 0
corr_2018[is.na(corr_2018)] <- 0
# compute p_values
corr_p_2018 <- cor_pmat(data_2018_filter[, names_display, with = F])
# replace NA value by 0
corr_p_2018[is.na(corr_p_2018)] <- 1
# display
ggcorrplot(
corr_2018,
p.mat = corr_p_2018,
hc.order = TRUE,
method = "circle",
type = "lower",
ggtheme = theme_jjo(),
sig.level = 0.05,
colors = c("#00AFBB", "#E7B800", "#FC4E07")
)
Another way to see it:
# flatten correlation matrix
cor_result_2018 <- flat_cor_mat(corr_2018, corr_p_2018)
# keep only the one above .7
cor_result_2018[cor >= .7, ][order(-abs(cor))] %>%
sable(caption = "Pairwise correlation above 0.75 and associated p-values")
Table 5: Pairwise correlation above 0.75 and associated p-values
|
row
|
column
|
cor
|
p
|
|
verticalspeed90perc
|
verticalspeed95perc
|
1.0
|
0
|
|
maxdepth
|
asctime
|
0.8
|
0
|
|
botttime
|
efficiency
|
0.8
|
0
|
|
dwigglesbott
|
foragingindex
|
0.8
|
0
|
|
maxdepth
|
dduration
|
0.7
|
0
|
|
maxdepth
|
desctime
|
0.7
|
0
|
|
dduration
|
desctime
|
0.7
|
0
|
|
dduration
|
asctime
|
0.7
|
0
|
|
totvertdistbot
|
bottrange
|
0.7
|
0
|
|
totvertdistbot
|
verticalspeed90perc
|
0.7
|
0
|
|
totvertdistbot
|
verticalspeed95perc
|
0.7
|
0
|
I guess nothing unexpected here, I’ll have to check with Patrick about the efficiency ;)
Dive Type
# dataset to plot proportional area plot
data_2018_filter[, sum_id := .N, by = .(.id, day_departure)] %>%
.[, sum_id_days := .N, by = .(.id, day_departure, divetype)] %>%
.[, prop := sum_id_days / sum_id]
dataPlot <- unique(data_2018_filter[, .(prop, .id, divetype, day_departure)])
# area plot
ggplot(dataPlot, aes(
x = as.numeric(day_departure),
y = prop,
fill = as.character(divetype)
)) +
geom_area(alpha = 0.6, size = 1) +
facet_wrap(.id ~ ., scales = "free") +
theme_jjo() +
theme(legend.position = "bottom") +
labs(x = "# of days since departure",
y = "Proportion of dives",
fill = "Dive types")
Dive duration vs. Maximum depth
Colored by ID
# plot
ggplot(data = data_2018_filter, aes(y = dduration, x = maxdepth, col = .id)) +
geom_point(size = .5, alpha = .1, show.legend = FALSE) +
facet_wrap(.id ~ .) +
labs(x = "Maximum depth (m)", y = "Dive duration (s)") +
theme_jjo()
Colored by Dive Type
# plot
ggplot(data = data_2018_filter, aes(y = dduration,
x = maxdepth,
col = divetype)) +
geom_point(size = .5, alpha = .1) +
facet_wrap(.id ~ .) +
guides(colour = guide_legend(override.aes = list(size = 5, alpha = 1))) +
labs(x = "Maximum depth (m)", y = "Dive duration (s)") +
theme_jjo() +
theme(legend.position = "bottom")
Colored by # days since departure
# plot
ggplot(data = data_2018_filter[, prop_track := (day_departure * 100) / max(day_departure), by = .id],
aes(y = dduration, x = maxdepth, col = prop_track)) +
geom_point(size = .5, alpha = .1) +
facet_wrap(.id ~ .) +
labs(x = "Maximum depth (m)",
y = "Dive duration (s)",
col = "Proportion of completed track (%)") +
scale_color_continuous(type = "viridis") +
theme_jjo() +
theme(legend.position = "bottom")
Colored by phases of day
# plot
ggplot(data = data_2018_filter, aes(y = dduration, x = maxdepth, col = phase)) +
geom_point(size = .5, alpha = .1) +
facet_wrap(.id ~ .) +
guides(colour = guide_legend(override.aes = list(size = 5, alpha = 1))) +
labs(x = "Maximum depth (m)",
y = "Dive duration (s)",
col = "Phases of the day") +
theme_jjo() +
theme(legend.position = "bottom")
There seems to be a patch for high depths (especially visible for ind2018070), but I don’t know what it could be linked to…
Drift Rate
In the following graphs:
driftrate is calculated using only divetype == "2: drift"
- whereas all the others variables are calculated all dives considered
# build dataset
dataPlot <- data_2018_filter[divetype == "2: drift",
# median drift rate for drift dive
.(driftrate = median(driftrate, na.rm = T)),
by = .(.id, day_departure)
][data_2018_filter[,
.(
# median dive duration all dives considered
dduration = median(dduration, na.rm = T),
# median max depth all dives considered
maxdepth = median(maxdepth, na.rm = T),
# median bottom dives all dives considered
botttime = median(botttime, na.rm = T)
),
by = .(.id, day_departure)
],
on = c(".id", "day_departure")
]
# plot
ggplot(dataPlot, aes(x = botttime, y = driftrate, col = .id)) +
geom_point(size = .5, alpha = .5) +
geom_smooth(method = "lm") +
guides(color = "none") +
facet_wrap(.id ~ .) +
scale_x_continuous(limits = c(0, 700)) +
labs(x = "Daily median Bottom time (s)",
y = "Daily median drift rate (m.s-1)") +
theme_jjo()
# plot
ggplot(dataPlot, aes(x = maxdepth, y = driftrate, col = .id)) +
geom_point(size = .5, alpha = .5) +
geom_smooth(method = "lm") +
guides(color = "none") +
facet_wrap(.id ~ .) +
labs(x = "Daily median Maximum depth (m)",
y = "Daily median drift rate (m.s-1)") +
theme_jjo()
# plot
ggplot(dataPlot, aes(x = dduration, y = driftrate, col = .id)) +
geom_point(size = .5, alpha = .5) +
geom_smooth(method = "lm") +
guides(color = "none") +
facet_wrap(.id ~ .) +
labs(x = "Daily median Dive duration (s)",
y = "Daily median drift rate (m.s-1)") +
theme_jjo()
Behavioral Aerobic Dive Limit (bADL)
# dive duration vs pdi by days
ggplot(data = data_2018_filter[pdi < 300, ], aes(
x = dduration,
y = pdi,
color = .id,
group = dduration,
fill = "none"
)) +
geom_boxplot(show.legend = FALSE, outlier.alpha = 0.05, alpha = 0) +
labs(x = "Dive duration (s)", y = "Post-dive duration (s)") +
facet_wrap(. ~ .id, scales = "free_x") +
theme_jjo()
# dive duration vs pdi by days
ggplot(data = data_2018_filter[pdi < 300,], aes(x = dduration,
y = pdi,
color = .id)) +
geom_point(show.legend = FALSE, alpha = 0.05) +
geom_smooth(
method = "gam",
show.legend = FALSE,
col = "black",
linetype = "dashed"
) +
labs(x = "Dive duration (s)", y = "Post-dive duration (s)") +
facet_wrap(. ~ .id, scales = "free_x") +
theme_jjo()
# dive duration vs pdi by days
ggplot(
data = data_2018_filter[pdi < 300, .(.id, pdi_ratio = pdi / dduration, day_departure)],
aes(
x = day_departure,
y = pdi_ratio,
color = .id,
group = day_departure,
fill = "none"
)
) +
geom_boxplot(show.legend = FALSE,
outlier.alpha = 0.05,
alpha = 0) +
labs(x = "# days since departure", y = "Post-dive / Dive duration ratio") +
facet_wrap(. ~ .id, scales = "free_x") +
# zoom
coord_cartesian(ylim = c(0, 0.4)) +
theme_jjo()
Based on Shero et al. (2018), we decided to look at the bADL as the 95th percentile of dive duration each day, for those with \(n \geq 50\). This threshold was chosen following this figure:
ggplot(data_2018_filter[,.(nb_dives = .N),
by = .(.id, day_departure)],
aes(x=nb_dives, fill=.id)) +
geom_histogram(show.legend = FALSE) +
facet_grid(.~.id) +
labs(y="# of days", x = "# of dives per day") +
theme_jjo()
# select day that have at least 50 dives
days_to_keep = data_2018_filter[,
.(nb_dives = .N),
by = .(.id, day_departure)] %>%
.[nb_dives >= 50,]
# keep only those days
data_2018_filter_complete_day = merge(data_2018_filter,
days_to_keep,
by = c(".id", "day_departure"))
# data plot
dataPlot = data_2018_filter_complete_day[,
.(badl = quantile(dduration, 0.95)),
by = .(.id, day_departure)]
# combine two datasets to be able to use a second axis
# https://stackoverflow.com/questions/49185583/two-y-axes-with-different-scales-for-two-datasets-in-ggplot2
dataMegaPlot = rbind(data_2018_filter_complete_day[divetype == "2: drift"] %>%
.[, .(w = .id,
y = driftrate,
x = day_departure,
z = "second_plot")],
dataPlot[, .(
w = .id,
# tricky one
y = (badl / 1000) - 1,
x = day_departure,
z = "first_plot"
)])
# plot
ggplot() +
geom_point(
data = dataMegaPlot[z == "second_plot", ],
aes(x = x, y = y),
alpha = 1 / 10,
size = 0.5,
color = "grey40",
show.legend = FALSE
) +
geom_path(data = dataMegaPlot[z == "first_plot", ],
aes(x = x, y = y, color = w),
show.legend = FALSE) +
scale_y_continuous(
# Features of the first axis
name = "Drift rate (m/s)",
# Add a second axis and specify its features
sec.axis = sec_axis( ~ (. * 1000) + 1000,
name = "Behavioral Aerobic Dive Limit (s)")
) +
labs(x = "# days since departure") +
facet_wrap(w ~ .) +
theme_jjo()
Looking at this graph, I want to believe that there is some kind of relationship between the bADL as defined by Shero et al. (2018) and the drift rate (and so buyoancy).
# get badl
dataplot_1 = data_2018_filter_complete_day[,
.(badl = quantile(dduration, 0.95)),
by = .(.id, day_departure)]
# get driftrate
dataplot_2 = data_2018_filter_complete_day[divetype == "2: drift",
.(driftrate = median(driftrate)),
by = .(.id, day_departure)]
# merge
dataPlot = merge(dataplot_1,
dataplot_2,
by = c(".id", "day_departure"),
all = TRUE)
# plot
ggplot(data = dataPlot, aes(x = badl, y = driftrate, col = .id)) +
geom_point(show.legend = FALSE) +
facet_wrap(.id~., scales = "free") +
theme_jjo()

ind_2018070
# ind_2018070
plot_ly(
x = dataPlot[.id == "ind_2018070", badl],
y = dataPlot[.id == "ind_2018070", day_departure],
z = dataPlot[.id == "ind_2018070", driftrate],
type = "scatter3d",
mode = "markers",
marker = list(size = 2),
color = dataPlot[.id == "ind_2018070", day_departure]
) %>%
layout(scene = list(xaxis = list(title = 'Behavioral ADL'),
yaxis = list(title = '# days since departure'),
zaxis = list(title = 'Drift rate (m/s)')))
ind_2018072
# ind_2018072
plot_ly(
x = dataPlot[.id == "ind_2018072", badl],
y = dataPlot[.id == "ind_2018072", day_departure],
z = dataPlot[.id == "ind_2018072", driftrate],
type = "scatter3d",
mode = "markers",
marker = list(size = 2),
color = dataPlot[.id == "ind_2018072", day_departure]
) %>%
layout(scene = list(xaxis = list(title = 'Behavioral ADL'),
yaxis = list(title = '# days since departure'),
zaxis = list(title = 'Drift rate (m/s)')))
ind_2018074
# ind_2018074
plot_ly(
x = dataPlot[.id == "ind_2018074", badl],
y = dataPlot[.id == "ind_2018074", day_departure],
z = dataPlot[.id == "ind_2018074", driftrate],
type = "scatter3d",
mode = "markers",
marker = list(size = 2),
color = dataPlot[.id == "ind_2018074", day_departure]
) %>%
layout(scene = list(xaxis = list(title = 'Behavioral ADL'),
yaxis = list(title = '# days since departure'),
zaxis = list(title = 'Drift rate (m/s)')))
ind_2018072
# ind_2018080
plot_ly(
x = dataPlot[.id == "ind_2018080", badl],
y = dataPlot[.id == "ind_2018080", day_departure],
z = dataPlot[.id == "ind_2018080", driftrate],
type = "scatter3d",
mode = "markers",
marker = list(size = 2),
color = dataPlot[.id == "ind_2018080", day_departure]
) %>%
layout(scene = list(xaxis = list(title = 'Behavioral ADL'),
yaxis = list(title = '# days since departure'),
zaxis = list(title = 'Drift rate (m/s)')))
GPS data
# This piece of code is only there to show how to draw a polylines with a
# gradient color using leaflet.We're not using it due to the size of the
# created map, and will continue using circle marker
# datasets used to display map
df_driftrate = unique(data_2018_filter[.id == "ind_2018070" &
divetype == "2: drift",
.(.id, lat, lon, dduration)])
# color palette
pal <- colorNumeric(
palette = "YlGnBu",
domain = df_driftrate$dduration
)
# add
df_driftrate[, `:=`(nextLat = shift(lat),
nextLon = shift(lon),
color = pal(df_driftrate$dduration))]
# interactive map
gradient_map <- leaflet() %>%
setView(lng = -122, lat = 38, zoom = 2) %>%
addTiles()
# add lines
for (i in 1:nrow(df_driftrate)) {
gradient_map <- addPolylines(
map = gradient_map,
data = df_driftrate,
lat = as.numeric(df_driftrate[i, c('lat', 'nextLat')]),
lng = as.numeric(df_driftrate[i, c('lon', 'nextLon')]),
color = df_driftrate[i, color],
weight = 3,
group = "individual_1"
)
}
# add layer control
gradient_map <- addLayersControl(
map = gradient_map,
overlayGroups = c("individual_1"),
options = layersControlOptions(collapsed = FALSE)
)
# format(object.size(gradient_map), units = "Mb")
Because for some data the contrast in changes was not enough marked, the only treatment applied on these data is to remove outliers for each variable using the interquartile range rule.
# interactive map
gradient_map <- leaflet() %>%
setView(lng = -132, lat = 48, zoom = 4) %>%
addTiles()
# loop by individuals and variable
grps = NULL
for (i in seq(data_2018_filter[!is.na(lat),unique(.id)])){
for (k in c("dduration","maxdepth","efficiency", "driftrate")){
if (k == "driftrate"){
# set dataset used to plot
dataPlot = unique(data_2018_filter %>%
.[order(date),] %>%
.[.id==data_2018_filter[!is.na(lat), unique(.id)][i] &
divetype == "2: drift" &
!is.na(get(k)),
c("lat", "lon", k), with=FALSE] %>%
.[!is_outlier(get(k)),])
# color palette creation
colPal <- colorNumeric(
palette = "BrBG",
domain = seq(-dataPlot[,max(abs(driftrate))],
dataPlot[,max(abs(driftrate))],
0.1)
)
} else {
# set dataset used to plot
dataPlot = unique(data_2018_filter %>%
.[order(date),] %>%
.[.id==data_2018_filter[!is.na(lat), unique(.id)][i] &
divetype != "2: drift" &
!is.na(get(k)),
c("lat", "lon", k), with=FALSE] %>%
.[!is_outlier(get(k)),])
# color palette creation
colPal <- colorNumeric(
palette = "YlGnBu",
domain = dataPlot[,get(k)]
)
}
# add color to dataset
dataPlot[, color := colPal(dataPlot[,get(k)])]
# add size column
dataPlot[, radius := 3]
# mark the beginning of the trip
dataPlot[1, `:=` (color = "green",
radius = 4)]
# mark the end of the trip
dataPlot[.N, `:=` (color = "red",
radius = 4)]
# reorder to make the end and the beginning in front
dataPlot = rbind(dataPlot[-1,],dataPlot[1,])
# add markers to map
gradient_map <- addCircleMarkers(
map = gradient_map,
data = dataPlot,
lat = ~lat,
lng = ~lon,
radius = ~radius,
stroke = FALSE,
color = ~color,
fillOpacity = 1,
group = paste(data_2018_filter[,unique(.id)][i], "-", k)
) %>%
addLegend("bottomleft",
data = dataPlot,
group = paste(data_2018_filter[,unique(.id)][i], "-", k),
pal = colPal,
values = ~get(k),
title = k,
opacity = 1
)
# retrieve groups
grps = c(grps, paste(data_2018_filter[,unique(.id)][i], "-", k))
}
}
# add layer control
gradient_map <- addLayersControl(
map = gradient_map,
overlayGroups = grps,
options = layersControlOptions(collapsed = TRUE)
) %>% hideGroup(grps)
# display
gradient_map
---
title: "Data Exploration - 2018"
author: "Joffrey JOUMAA"
date: "`r invisible(Sys.setlocale(locale = 'C')); format(Sys.Date(), format = '%B %d, %Y')`"
output:
  bookdown::html_document2:
    css: cosmo_custom.css
    number_sections: yes
    code_folding: show
    df_print: default
    fig_caption: yes
    code_download: yes
    toc: yes
    toc_float:
      collapsed: yes
      smooth_scroll: no
vignette: >
  %\VignetteIndexEntry{Data Exploration - 2018}
  %\VignetteEngine{knitr::rmarkdown}
  %\VignetteEncoding{UTF-8}
---
  
```{r setup, include=FALSE}
# command to build package without getting vignette error
# https://github.com/rstudio/renv/issues/833
# devtools::check(build_args=c("--no-build-vignettes"))

# reduce png size
knitr::knit_hooks$set(optipng = knitr::hook_optipng)
knitr::knit_hooks$set(pngquant = knitr::hook_pngquant)

# global option relative to rmarkdown
knitr::opts_chunk$set(
  echo = TRUE,
  fig.align = "center",
  out.width = "100%",
  message = FALSE,
  warning = FALSE,
  # tidy = TRUE,
  optipng = "-o7 -quiet",
  pngquant = "--speed=1"
)

# library
library(data.table)
library(ggplot2)
library(kableExtra)
library(leaflet)
library(gtsummary) # https://cran.r-project.org/web/packages/gtsummary/vignettes/tbl_summary.html
library(corrplot)
library(ggcorrplot)
library(ggnewscale)
library(magrittr)
library(DT)
library(plotly)
library(geosphere)

# remove some warnings
suppressWarnings(library(ggplot2))

# define my own table format: https://github.com/haozhu233/kableExtra/issues/374
sable <- function(x, escape = T, ...) {
  knitr::kable(x, escape = escape, ...) %>%
    kable_styling(
      bootstrap_options = c("striped", "hover", "responsive"),
      full_width = F
    )
}

# theme ggplot
# based: https://benjaminlouis-stat.fr/en/blog/2020-05-21-astuces-ggplot-rmarkdown/
theme_jjo <- function(base_size = 12) {
  theme_bw(base_size = base_size) %+replace%
    theme(
      # the whole figure
      # plot.title = element_text(size = rel(1), face = "bold", margin = margin(0,0,5,0), hjust = 0),
      # figure area
      panel.grid.minor = element_blank(),
      panel.border = element_blank(),
      # axes
      # axis.title = element_text(size = rel(0.85), face = "bold"),
      # axis.text = element_text(size = rel(0.70), face = "bold"),
      axis.line = element_line(color = "black", arrow = arrow(length = unit(0.2, "lines"), type = "closed")),
      # legend
      # legend.title = element_text(size = rel(0.85), face = "bold"),
      # legend.text = element_text(size = rel(0.70), face = "bold"),
      # legend.key = element_rect(fill = "transparent", colour = NA),
      # legend.key.size = unit(1.5, "lines"),
      # legend.background = element_rect(fill = "transparent", colour = NA),
      # Les <U+00E9>tiquettes dans le cas d'un facetting
      strip.background = element_rect(fill = "#888888", color = "#888888"),
      strip.text = element_text(size = rel(0.85), face = "bold", color = "white", margin = margin(5, 0, 5, 0))
    )
}
```

## {.unnumbered}

This document aims at exploring the dataset of 4 individuals in 2018. For that purpose, we need first to load the `weanlingNES` package to load data.

```{r data-exploration-2018-1}
# load library
library(weanlingNES)

# load data
data("data_nes", package = "weanlingNES")
# load("../data/data_nes.rda")
```

Let’s have a look at what’s inside `data_nes$data_2018`:
  
```{r data-exploration-2018-2}
# list structure
str(data_nes$year_2018, max.level = 1, give.attr = F, no.list = T)
```

> A list of `r length(data_nes$year_2018)` `data.frames`, one for each seal

For convenience, we aggregate all `r length(data_nes$year_2018)` individuals into one dataset.

```{r data-exploration-2018-3, eval=FALSE}
# combine all individuals
data_2018 <- rbindlist(data_nes$year_2018)

# display
DT::datatable(data_2018[sample.int(.N, 10), ], options = list(scrollX = T))
```
```{r data-exploration-2018-4, echo=FALSE, results='asis'}
# combine all individuals
data_2018 <- rbindlist(data_nes$year_2018)

# title
cat("<table style='width: 50%'>", 
    paste0(
      "<caption>", 
      "(#tab:myDThtmltools)", 
      "Sample of 10 random rows from `data_2018`", 
      "</caption>"), 
    "</table>", 
    sep = "\n")

# display
DT::datatable(data_2018[sample.int(.N, 10), ], options = list(scrollX = T))
```

## Summary

```{r data-exploration-2018-5}
# raw_data
data_2018[, .(
  nb_days_recorded = uniqueN(as.Date(date)),
  nb_dives = .N,
  maxdepth_mean = mean(maxdepth),
  dduration_mean = mean(dduration),
  botttime_mean = mean(botttime),
  pdi_mean = mean(pdi, na.rm = T)
), by = .id] %>%
  sable(
    caption = "Summary diving information relative to each 2018 individual",
    digits = 2
  )
```
> Very nice dataset :)

## Some explanatory plots

### Missing values

```{r data-exploration-2018-6, fig.cap="Check for missing value in 2018-individuals", fig.width=9}
# build dataset to check for missing values
dataPlot <- melt(data_2018[, .(.id, is.na(.SD)), .SDcol = -c(
  ".id",
  "divenumber",
  "year",
  "month",
  "day",
  "hour",
  "min",
  "sec",
  "juldate",
  "divetype",
  "date",
  "phase",
  "lat",
  "lon"
)])
# add the id of rows
dataPlot[, id_row := c(1:.N), by = c("variable", ".id")]

# plot
ggplot(dataPlot, aes(x = variable, y = id_row, fill = value)) +
  geom_tile() +
  labs(x = "Attributes", y = "Rows") +
  scale_fill_manual(
    values = c("white", "black"),
    labels = c("Real", "Missing")
  ) +
  facet_wrap(.id ~ ., scales = "free_y") +
  theme_jjo() +
  theme(
    legend.position = "top",
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.key = element_rect(colour = "black")
  )
```

So far so good, only few variables seems to have missing values:
  
```{r data-exploration-2018-7}
# table with percent
table_inter <- data_2018[, lapply(.SD, function(x) {
  round(length(x[is.na(x)]) * 100 / length(x), 1)
}), .SDcol = -c(
  ".id",
  "divenumber",
  "year",
  "month",
  "day",
  "hour",
  "min",
  "sec",
  "juldate",
  "divetype",
  "date",
  "phase",
  "lat",
  "lon"
)]

# find which are different from 0
cond_inter <- sapply(table_inter, function(x) {
  x == 0
})

# display the percentages that are over 0
table_inter[, which(cond_inter) := NULL] %>%
  sable(caption = "Percentage of missing values per columns having missing values!") %>%
  scroll_box(width = "100%")
```

### Outliers {.tabset}

Ok, let's have a look at all the data. But first, we have to remove outliers. Some of them are quiet easy to spot looking at the distribution of dive duration:

#### Before {-}

```{r data-exploration-2018-8, fig.cap='Distribution of `dduration` for each seal. The dashed line highlight the "subjective" threshold used to remove outliers (3000 sec)', fig.height=3}
ggplot(
  data_2018[, .SD][, state := "Before"],
  aes(x = dduration, fill = .id)
) +
  geom_histogram(show.legend = FALSE) +
  geom_vline(xintercept = 3000, linetype = "longdash") +
  facet_grid(state ~ .id,
             scales = "free"
  ) +
  labs(y = "# of dives", x = "Dive duration (s)") +
  theme_jjo()
```

#### After {-}

```{r data-exploration-2018-9, fig.cap='Same distribution of `dduration` for each seal but after removing any `dduration` > 3000 sec. The dashed line highlight the "subjective" threshold used to remove outliers', fig.height=3}
ggplot(
  data_2018[dduration < 3000, ][][, state := "After"],
  aes(x = dduration, fill = .id)
) +
  geom_histogram(show.legend = FALSE) +
  geom_vline(xintercept = 3000, linetype = "longdash") +
  facet_grid(state ~ .id,
             scales = "free"
  ) +
  labs(x = "# of dives", y = "Dive duration (s)") +
  theme_jjo()
```

It seems much better, so let's remove any rows with `dduration` > 3000 sec.

```{r data-exploration-2018-10}
# filter data
data_2018_filter <- data_2018[dduration < 3000, ]

# nbrow removed
data_2018[dduration >= 3000, .(nb_row_removed = .N), by = .id] %>%
  sable(caption = "# of rows removed by 2018-individuals")
```

### Check day and night {.tabset}

#### Light levels

```{r data-exploration-2018-11, fig.cap="Visualization of light level at the surface along 2018-individuals' trip", fig.height=6}
# let's first average `lightatsurf` by individuals, day since departure and hour
dataPlot <- data_2018[, .(lightatsurf = median(lightatsurf)),
                      by = .(.id, day_departure, date = as.Date(date), hour)
]

# display the result
ggplot(dataPlot, aes(x = day_departure, y = hour, fill = lightatsurf)) +
  geom_tile() +
  facet_grid(.id ~ .) +
  theme_jjo() +
  labs(x = "# of days since departure", 
       y = "Hour", 
       fill = "Light level at the surface")+
  theme(legend.position = c("bottom"))
```

#### Day and night detection

```{r data-exploration-2018-12, fig.cap="Visualization of detected night time and day time along 2018-individuals' trip", fig.height=6}
# let's first average `lightatsurf` by individuals, day since departure and hour
dataPlot <- data_2018[, .(lightatsurf = median(lightatsurf)),
                      by = .(.id, 
                             day_departure, 
                             date = as.Date(date), 
                             hour, 
                             phase)
]

# display the result
ggplot(dataPlot, aes(x = day_departure, y = hour, fill = phase)) +
  geom_tile() +
  facet_grid(.id ~ .) +
  theme_jjo() +
  labs(x = "# of days since departure", 
       y = "Hour", 
       fill = "Day time and night time as detected by the `cal_phase_day` function") +
  theme(legend.position = c("bottom"))
```

### All Variables 

```{r data-exploration-2018-13}
names_display <- names(data_2018_filter[, -c(
  ".id",
  "date",
  "divenumber",
  "year",
  "month",
  "day",
  "hour",
  "min",
  "sec",
  "juldate",
  "divetype",
  "euphoticdepth",
  "thermoclinedepth",
  "day_departure",
  "phase",
  "lat",
  "lon",
  "dist_dep"
)])
```

```{r data-exploration-2018-14, eval=FALSE, include=TRUE}
for (i in names_display) {
  cat("#####", i, "{.unlisted .unnumbered} \n")
  if (i == "maxdepth") {
    print(
      ggplot() +
        geom_point(
          data = data_2018_filter[, .(
            .id,
            date,
            thermoclinedepth
          )],
          aes(
            x = as.Date(date),
            y = -thermoclinedepth,
            colour = "Thermocline (m)"
          ),
          alpha = .2,
          size = .5
        ) +
        geom_point(
          data = data_2018_filter[, .(
            .id,
            date,
            euphoticdepth
          )],
          aes(
            x = as.Date(date),
            y = -euphoticdepth,
            colour = "Euphotic (m)"
          ),
          alpha = .2,
          size = .5
        ) +
        scale_colour_manual(
          values = c(
            "Thermocline (m)" = "red",
            "Euphotic (m)" = "black"
          ),
          name = "Zone"
        ) +
        new_scale_color() +
        geom_point(
          data = melt(data_2018_filter[, .(.id, date, get(i))], 
                      id.vars = c(".id", "date")),
          aes(
            x = as.Date(date),
            y = -value,
            col = .id
          ),
          alpha = 1 / 10,
          size = .5,
          show.legend = FALSE
        ) +
        facet_wrap(. ~ .id, scales = "free") +
        scale_x_date(date_labels = "%m/%Y") +
        labs(x = "Date", y = "Maximum Depth (m)") +
        theme_jjo() +
        theme(
          axis.text.x = element_text(angle = 45, hjust = 1),
          legend.position = "bottom"
        )
    )
    cat("<blockquote> Considering `ind_2018074` has slightly different values than other individuals for the thermocline depth, it would be interesting to see where the animal went. </blockquote>")
  } else if (i == "driftrate") {
    print(
      ggplot(
        data = melt(data_2018_filter[, .(.id, date, get(i), divetype)], 
                    id.vars = c(".id", "date", "divetype")),
        aes(
          x = as.Date(date),
          y = value,
          col = divetype
        )
      ) +
        geom_point(
          alpha = 1 / 10,
          size = .5
        ) +
        facet_wrap(. ~ .id, scales = "free") +
        scale_x_date(date_labels = "%m/%Y") +
        labs(x = "Date", y = "Drift Rate 'm/s", col = "Dive Type") +
        theme_jjo() +
        theme(
          axis.text.x = element_text(angle = 45, hjust = 1),
          legend.position = "bottom"
        ) +
        guides(colour = guide_legend(override.aes = list(
          size = 7,
          alpha = 1
        )))
    )
  } else {
    print(
      ggplot(
        data = melt(data_2018_filter[, .(.id, date, get(i))], 
                    id.vars = c(".id", "date")),
        aes(
          x = as.Date(date),
          y = value,
          col = .id
        )
      ) +
        geom_point(
          show.legend = FALSE,
          alpha = 1 / 10,
          size = .5
        ) +
        facet_wrap(. ~ .id, scales = "free") +
        scale_x_date(date_labels = "%m/%Y") +
        labs(x = "Date", y = i) +
        theme_jjo() +
        theme(axis.text.x = element_text(angle = 45, hjust = 1))
    )
  }
  
  cat("\n \n")
}
```

### {.unlisted .unnumbered .tabset .tabset-fade .tabset-pills}

```{r data-exploration-2018-15, results='asis', cache=TRUE, echo=FALSE}
for (i in names_display) {
  cat("####", i, "{.unlisted .unnumbered} \n")
  if (i == "maxdepth") {
    print(
      ggplot() +
        geom_point(
          data = data_2018_filter[, .(
            .id,
            date,
            thermoclinedepth
          )],
          aes(
            x = as.Date(date),
            y = -thermoclinedepth,
            colour = "Thermocline (m)"
          ),
          alpha = .2,
          size = .5
        ) +
        geom_point(
          data = data_2018_filter[, .(
            .id,
            date,
            euphoticdepth
          )],
          aes(
            x = as.Date(date),
            y = -euphoticdepth,
            colour = "Euphotic (m)"
          ),
          alpha = .2,
          size = .5
        ) +
        scale_colour_manual(
          values = c(
            "Thermocline (m)" = "red",
            "Euphotic (m)" = "black"
          ),
          name = "Zone"
        ) +
        new_scale_color() +
        geom_point(
          data = melt(data_2018_filter[, .(.id, date, get(i))], 
                      id.vars = c(".id", "date")),
          aes(
            x = as.Date(date),
            y = -value,
            col = .id
          ),
          alpha = 1 / 10,
          size = .5,
          show.legend = FALSE
        ) +
        facet_wrap(. ~ .id, scales = "free") +
        scale_x_date(date_labels = "%m/%Y") +
        labs(x = "Date", y = "Maximum Depth (m)") +
        theme_jjo() +
        theme(
          axis.text.x = element_text(angle = 45, hjust = 1),
          legend.position = "bottom"
        )
    )
    cat("<blockquote> Considering `ind_2018074` has slightly different values than other individuals for the thermocline depth, it would be interesting to see where the animal went. </blockquote>")
  } else if (i == "driftrate") {
    print(
      ggplot(
        data = melt(data_2018_filter[, .(.id, date, get(i), divetype)], 
                    id.vars = c(".id", "date", "divetype")),
        aes(
          x = as.Date(date),
          y = value,
          col = divetype
        )
      ) +
        geom_point(
          alpha = 1 / 10,
          size = .5
        ) +
        facet_wrap(. ~ .id, scales = "free") +
        scale_x_date(date_labels = "%m/%Y") +
        labs(x = "Date", y = "Drift Rate 'm/s", col = "Dive Type") +
        theme_jjo() +
        theme(
          axis.text.x = element_text(angle = 45, hjust = 1),
          legend.position = "bottom"
        ) +
        guides(colour = guide_legend(override.aes = list(
          size = 7,
          alpha = 1
        )))
    )
  } else {
    print(
      ggplot(
        data = melt(data_2018_filter[, .(.id, date, get(i))], 
                    id.vars = c(".id", "date")),
        aes(
          x = as.Date(date),
          y = value,
          col = .id
        )
      ) +
        geom_point(
          show.legend = FALSE,
          alpha = 1 / 10,
          size = .5
        ) +
        facet_wrap(. ~ .id, scales = "free") +
        scale_x_date(date_labels = "%m/%Y") +
        labs(x = "Date", y = i) +
        theme_jjo() +
        theme(axis.text.x = element_text(angle = 45, hjust = 1))
    )
  }
  
  cat("\n \n")
}
```

### {.unlisted .unnumbered}

> Few questions, that I should look into it:
  >
  > * is the bimodal distribution of `dduration`, `desctime` due to nycthemeral migration?
  > * is the bimodal distribution of `descrate` (especially for `ind2018070` and `ind_2018072`) due to drift dive?
  > * is `lightatbott` could be used to identify bioluminescence, cause it seems there is a lot going on at the bottom?
  > * are the variations observed for `lightatsurf` is due to moon cycle?
  > * not sure why is there a bimodal distribution of `tempatbott`!
  > * `drifrate` that one is awesome! Thanks to `divetype` we can clearly see a pattern of how driftrate (and so buoyancy) change according time.
> * the bimodal distribution of `verticalspeed90` and `verticalspeed95` should be due to drift dive.

```{r data-exploration-2018-16, eval=FALSE, include=TRUE}
# same plot with a colored for the phase of the day
for (i in names_display) {
  cat("####", i, "{-} \n")
  print(
    ggplot(
      data = melt(data_2018_filter[, .(.id, date, get(i), phase)],
                  id.vars = c(
                    ".id",
                    "date",
                    "phase"
                  )
      ),
      aes(
        x = as.Date(date),
        y = value,
        col = phase
      )
    ) +
      geom_point(
        alpha = 1 / 10,
        size = .5
      ) +
      facet_wrap(. ~ .id, scales = "free") +
      scale_x_date(date_labels = "%m/%Y") +
      labs(x = "Date", y = i) +
      theme_jjo() +
      theme(
        axis.text.x = element_text(angle = 45, hjust = 1),
        legend.position = "bottom"
      ) +
      guides(colour = guide_legend(override.aes = list(
        size = 7,
        alpha = 1
      )))
  )
  cat("\n \n")
}
```

### {.unlisted .unnumbered .tabset .tabset-fade .tabset-pills}

```{r data-exploration-2018-17, results='asis', cache=TRUE, echo=FALSE}
# same plot with a colored for the phase of the day
for (i in names_display) {
  cat("####", i, "{-} \n")
  print(
    ggplot(
      data = melt(data_2018_filter[, .(.id, date, get(i), phase)],
                  id.vars = c(
                    ".id",
                    "date",
                    "phase"
                  )
      ),
      aes(
        x = as.Date(date),
        y = value,
        col = phase
      )
    ) +
      geom_point(
        alpha = 1 / 10,
        size = .5
      ) +
      facet_wrap(. ~ .id, scales = "free") +
      scale_x_date(date_labels = "%m/%Y") +
      labs(x = "Date", y = i) +
      theme_jjo() +
      theme(
        axis.text.x = element_text(angle = 45, hjust = 1),
        legend.position = "bottom"
      ) +
      guides(colour = guide_legend(override.aes = list(
        size = 7,
        alpha = 1
      )))
  )
  cat("\n \n")
}
```

## All Variables during the first month

```{r data-exploration-2018-18, eval=FALSE, include=TRUE}
for (i in names_display) {
  cat("####", i, "{.unlisted .unnumbered} \n")
  if (i == "maxdepth") {
    print(
      ggplot() +
        geom_point(
          data = data_2018_filter[day_departure < 32, .(
            .id,
            day_departure,
            thermoclinedepth
          )],
          aes(
            x = day_departure,
            y = -thermoclinedepth,
            colour = "Thermocline (m)",
            group = day_departure
          ),
          alpha = .2,
          size = .5
        ) +
        geom_point(
          data = data_2018_filter[day_departure < 32, .(
            .id,
            day_departure,
            euphoticdepth
          )],
          aes(
            x = day_departure,
            y = -euphoticdepth,
            colour = "Euphotic (m)",
            group = day_departure
          ),
          alpha = .2,
          size = .5
        ) +
        scale_colour_manual(
          values = c(
            "Thermocline (m)" = "red",
            "Euphotic (m)" = "black"
          ),
          name = "Zone"
        ) +
        new_scale_color() +
        geom_boxplot(
          data = melt(data_2018_filter[day_departure < 32, 
                                       .(.id, day_departure, get(i))], 
                      id.vars = c(".id", "day_departure")),
          aes(
            x = day_departure,
            y = -value,
            col = .id,
            group = day_departure
          ),
          alpha = 1 / 10,
          size = .5,
          show.legend = FALSE
        ) +
        facet_wrap(. ~ .id, scales = "free") +
        labs(x = "# days since departure", y = "Maximum Depth (m)") +
        theme_jjo() +
        theme(legend.position = "bottom")
    )
  } else {
    print(
      ggplot(
        data = melt(data_2018_filter[day_departure < 32, 
                                     .(.id, day_departure, get(i))], 
                    id.vars = c(".id", "day_departure")),
        aes(
          x = day_departure,
          y = value,
          color = .id,
          group = day_departure
        )
      ) +
        geom_boxplot(
          show.legend = FALSE,
          alpha = 1 / 10,
          size = .5
        ) +
        facet_wrap(. ~ .id, scales = "free") +
        labs(x = "# days since departure", y = i) +
        theme_jjo()
    )
  }
  cat("\n \n")
}
```

### {.unlisted .unnumbered .tabset .tabset-fade .tabset-pills}

```{r data-exploration-2018-19, results='asis', cache=TRUE, echo=FALSE}
for (i in names_display) {
  cat("####", i, "{.unlisted .unnumbered} \n")
  if (i == "maxdepth") {
    print(
      ggplot() +
        geom_point(
          data = data_2018_filter[day_departure < 32, .(
            .id,
            day_departure,
            thermoclinedepth
          )],
          aes(
            x = day_departure,
            y = -thermoclinedepth,
            colour = "Thermocline (m)",
            group = day_departure
          ),
          alpha = .2,
          size = .5
        ) +
        geom_point(
          data = data_2018_filter[day_departure < 32, .(
            .id,
            day_departure,
            euphoticdepth
          )],
          aes(
            x = day_departure,
            y = -euphoticdepth,
            colour = "Euphotic (m)",
            group = day_departure
          ),
          alpha = .2,
          size = .5
        ) +
        scale_colour_manual(
          values = c(
            "Thermocline (m)" = "red",
            "Euphotic (m)" = "black"
          ),
          name = "Zone"
        ) +
        new_scale_color() +
        geom_boxplot(
          data = melt(data_2018_filter[day_departure < 32, 
                                       .(.id, day_departure, get(i))], 
                      id.vars = c(".id", "day_departure")),
          aes(
            x = day_departure,
            y = -value,
            col = .id,
            group = day_departure
          ),
          alpha = 1 / 10,
          size = .5,
          show.legend = FALSE
        ) +
        facet_wrap(. ~ .id, scales = "free") +
        labs(x = "# days since departure", y = "Maximum Depth (m)") +
        theme_jjo() +
        theme(legend.position = "bottom")
    )
  } else {
    print(
      ggplot(
        data = melt(data_2018_filter[day_departure < 32, 
                                     .(.id, day_departure, get(i))], 
                    id.vars = c(".id", "day_departure")),
        aes(
          x = day_departure,
          y = value,
          color = .id,
          group = day_departure
        )
      ) +
        geom_boxplot(
          show.legend = FALSE,
          alpha = 1 / 10,
          size = .5
        ) +
        facet_wrap(. ~ .id, scales = "free") +
        labs(x = "# days since departure", y = i) +
        theme_jjo()
    )
  }
  cat("\n \n")
}
```

### {.unlisted .unnumbered}

```{r data-exploration-2018-20, eval=FALSE, include=TRUE}
for (i in names_display) {
  cat("####", i, "{.unlisted .unnumbered} \n")
  print(
    ggplot(
      data = melt(data_2018_filter[
        day_departure < 32,
        .(.id, day_departure, get(i), phase)
      ],
      id.vars = c(".id", "day_departure", "phase")
      ),
      aes(
        x = day_departure,
        y = value,
        color = phase,
        group = interaction(day_departure, phase),
      )
    ) +
      geom_boxplot(
        alpha = 1 / 10,
        size = .5
      ) +
      facet_wrap(. ~ .id, scales = "free") +
      labs(x = "# days since departure", y = i) +
      theme_jjo() +
      theme(legend.position = "bottom")
  )
  cat("\n \n")
}
```

### {.unlisted .unnumbered .tabset .tabset-fade .tabset-pills}

```{r data-exploration-2018-21, results='asis', cache=TRUE, echo=FALSE}
for (i in names_display) {
  cat("####", i, "{.unlisted .unnumbered} \n")
  print(
    ggplot(
      data = melt(data_2018_filter[
        day_departure < 32,
        .(.id, day_departure, get(i), phase)
      ],
      id.vars = c(".id", "day_departure", "phase")
      ),
      aes(
        x = day_departure,
        y = value,
        color = phase,
        group = interaction(day_departure, phase),
      )
    ) +
      geom_boxplot(
        alpha = 1 / 10,
        size = .5
      ) +
      facet_wrap(. ~ .id, scales = "free") +
      labs(x = "# days since departure", y = i) +
      theme_jjo() +
      theme(legend.position = "bottom")
  )
  cat("\n \n")
}
```

### Correlation

Can we find nice correlation?
  
```{r data-exploration-2018-22, fig.cap="Correlation matrix (crosses indicate non significant correlation)", fig.width=10, fig.height=10}
# compute correlation
corr_2018 <- round(cor(data_2018_filter[, names_display, with = F],
                       use = "pairwise.complete.obs"
), 1)

# replace NA value by 0
corr_2018[is.na(corr_2018)] <- 0

# compute p_values
corr_p_2018 <- cor_pmat(data_2018_filter[, names_display, with = F])

# replace NA value by 0
corr_p_2018[is.na(corr_p_2018)] <- 1

# display
ggcorrplot(
  corr_2018,
  p.mat = corr_p_2018,
  hc.order = TRUE,
  method = "circle",
  type = "lower",
  ggtheme = theme_jjo(),
  sig.level = 0.05,
  colors =  c("#00AFBB", "#E7B800", "#FC4E07")
)
```

Another way to see it:
  
```{r data-exploration-2018-23}
# flatten correlation matrix
cor_result_2018 <- flat_cor_mat(corr_2018, corr_p_2018)

# keep only the one above .7
cor_result_2018[cor >= .7, ][order(-abs(cor))] %>%
  sable(caption = "Pairwise correlation above 0.75 and associated p-values")
```

> I guess nothing unexpected here, I'll have to check with Patrick about the efficiency ;)

## Dive Type

```{r data-exploration-2018-24, fig.cap="Proportion dive types"}
# dataset to plot proportional area plot
data_2018_filter[, sum_id := .N, by = .(.id, day_departure)] %>%
  .[, sum_id_days := .N, by = .(.id, day_departure, divetype)] %>%
  .[, prop := sum_id_days / sum_id]
dataPlot <- unique(data_2018_filter[, .(prop, .id, divetype, day_departure)])

# area plot
ggplot(dataPlot, aes(
  x = as.numeric(day_departure),
  y = prop,
  fill = as.character(divetype)
)) +
  geom_area(alpha = 0.6, size = 1) +
  facet_wrap(.id ~ ., scales = "free") +
  theme_jjo() +
  theme(legend.position = "bottom") +
  labs(x = "# of days since departure", 
       y = "Proportion of dives", 
       fill = "Dive types")
```

## Dive duration *vs.* Maximum depth {.tabset}

### Colored by ID

```{r data-exploration-2018-25, fig.cap="Dive duration vs. Maximum Depth colored 2018-individuals"}
# plot
ggplot(data = data_2018_filter, aes(y = dduration, x = maxdepth, col = .id)) +
  geom_point(size = .5, alpha = .1, show.legend = FALSE) +
  facet_wrap(.id ~ .) +
  labs(x = "Maximum depth (m)", y = "Dive duration (s)") +
  theme_jjo()
```

### Colored by Dive Type

```{r data-exploration-2018-26, fig.cap="Dive duration vs. Maximum Depth colored by Dive Type"}
# plot
ggplot(data = data_2018_filter, aes(y = dduration, 
                                    x = maxdepth, 
                                    col = divetype)) +
  geom_point(size = .5, alpha = .1) +
  facet_wrap(.id ~ .) +
  guides(colour = guide_legend(override.aes = list(size = 5, alpha = 1))) +
  labs(x = "Maximum depth (m)", y = "Dive duration (s)") +
  theme_jjo() +
  theme(legend.position = "bottom")
```

### Colored by # days since departure

```{r data-exploration-2018-27, fig.cap="Dive duration vs. Maximum Depth colored by # days since departure"}
# plot
ggplot(data = data_2018_filter[, prop_track := (day_departure * 100) / max(day_departure), by = .id], 
       aes(y = dduration, x = maxdepth, col = prop_track)) +
  geom_point(size = .5, alpha = .1) +
  facet_wrap(.id ~ .) +
  labs(x = "Maximum depth (m)", 
       y = "Dive duration (s)", 
       col = "Proportion of completed track (%)") +
  scale_color_continuous(type = "viridis") +
  theme_jjo() +
  theme(legend.position = "bottom")
```

### Colored by phases of day

```{r data-exploration-2018-28, fig.cap="Dive duration vs. Maximum Depth colored by phases of the day"}
# plot
ggplot(data = data_2018_filter, aes(y = dduration, x = maxdepth, col = phase)) +
  geom_point(size = .5, alpha = .1) +
  facet_wrap(.id ~ .) +
  guides(colour = guide_legend(override.aes = list(size = 5, alpha = 1))) +
  labs(x = "Maximum depth (m)", 
       y = "Dive duration (s)", 
       col = "Phases of the day") +
  theme_jjo() +
  theme(legend.position = "bottom")
```

> There seems to be a *patch* for high depths (especially visible for `ind2018070`), but I don't know what it could be linked to...

## Drift Rate

> In the following graphs:
>
> * `driftrate` is calculated using only `divetype == "2: drift"`
> * whereas all the others variables are calculated all dives considered

```{r data-exploration-2018-29}
# build dataset
dataPlot <- data_2018_filter[divetype == "2: drift",
                             # median drift rate for drift dive
                             .(driftrate = median(driftrate, na.rm = T)),
                             by = .(.id, day_departure)
][data_2018_filter[,
                   .(
                     # median dive duration all dives considered
                     dduration = median(dduration, na.rm = T),
                     # median max depth all dives considered
                     maxdepth = median(maxdepth, na.rm = T),
                     # median bottom dives all dives considered
                     botttime = median(botttime, na.rm = T)
                   ),
                   by = .(.id, day_departure)
],
on = c(".id", "day_departure")
]
```

```{r data-exploration-2018-30, fig.cap="Drift rate vs. Bottom time"}
# plot
ggplot(dataPlot, aes(x = botttime, y = driftrate, col = .id)) +
  geom_point(size = .5, alpha = .5) +
  geom_smooth(method = "lm") +
  guides(color = "none") +
  facet_wrap(.id ~ .) +
  scale_x_continuous(limits = c(0, 700)) +
  labs(x = "Daily median Bottom time (s)", 
       y = "Daily median drift rate (m.s-1)") +
  theme_jjo()
```

```{r data-exploration-2018-31, fig.cap="Drift rate vs. Maximum depth"}
# plot
ggplot(dataPlot, aes(x = maxdepth, y = driftrate, col = .id)) +
  geom_point(size = .5, alpha = .5) +
  geom_smooth(method = "lm") +
  guides(color = "none") +
  facet_wrap(.id ~ .) +
  labs(x = "Daily median Maximum depth (m)", 
       y = "Daily median drift rate (m.s-1)") +
  theme_jjo()
```

```{r data-exploration-2018-32, fig.cap="Drift rate vs. Dive duration"}
# plot
ggplot(dataPlot, aes(x = dduration, y = driftrate, col = .id)) +
  geom_point(size = .5, alpha = .5) +
  geom_smooth(method = "lm") +
  guides(color = "none") +
  facet_wrap(.id ~ .) +
  labs(x = "Daily median Dive duration (s)", 
       y = "Daily median drift rate (m.s-1)") +
  theme_jjo()
```

## Behavioral Aerobic Dive Limit (bADL)

### [Cook et al (2008)](https://www.sciencedirect.com/science/article/pii/S000334720800136X?via%3Dihub)

```{r data-exploration-2018-33, fig.cap="Post-dive duration vs. dive duration"}
# dive duration vs pdi by days
ggplot(data = data_2018_filter[pdi < 300, ], aes(
  x = dduration,
  y = pdi,
  color = .id,
  group = dduration,
  fill = "none"
)) +
  geom_boxplot(show.legend = FALSE, outlier.alpha = 0.05, alpha = 0) +
  labs(x = "Dive duration (s)", y = "Post-dive duration (s)") +
  facet_wrap(. ~ .id, scales = "free_x") +
  theme_jjo()
```

```{r data-exploration-2018-34, fig.cap="Post-dive duration vs. dive duration (raw data)"}
# dive duration vs pdi by days
ggplot(data = data_2018_filter[pdi < 300,], aes(x = dduration, 
                                                y = pdi, 
                                                color = .id)) +
  geom_point(show.legend = FALSE, alpha = 0.05) +
  geom_smooth(
    method = "gam",
    show.legend = FALSE,
    col = "black",
    linetype = "dashed"
  ) +
  labs(x = "Dive duration (s)", y = "Post-dive duration (s)") +
  facet_wrap(. ~ .id, scales = "free_x") +
  theme_jjo()
```
```{r data-exploration-2018-35, fig.cap="Post-dive duration / dive duration ratio vs. day since departure"}
# dive duration vs pdi by days
ggplot(
  data = data_2018_filter[pdi < 300, .(.id, pdi_ratio = pdi / dduration, day_departure)],
  aes(
    x = day_departure,
    y = pdi_ratio,
    color = .id,
    group = day_departure,
    fill = "none"
  )
) +
  geom_boxplot(show.legend = FALSE,
               outlier.alpha = 0.05,
               alpha = 0) +
  labs(x = "# days since departure", y = "Post-dive / Dive duration ratio") +
  facet_wrap(. ~ .id, scales = "free_x") +
  # zoom
  coord_cartesian(ylim = c(0, 0.4)) +
  theme_jjo()
```

### [Shero et *al.* (2018)](https://www.researchgate.net/publication/222683042_To_breathe_or_not_to_breathe_Optimal_breathing_aerobic_dive_limit_and_oxygen_stores_in_deep-diving_blue-eyed_shags)

Based on [Shero et *al.* (2018)](https://www.researchgate.net/publication/222683042_To_breathe_or_not_to_breathe_Optimal_breathing_aerobic_dive_limit_and_oxygen_stores_in_deep-diving_blue-eyed_shags), we decided to look at the *bADL* as the 95th percentile of dive duration each day, for those with $n \geq 50$. This threshold was chosen following this figure:

```{r data-exploration-2018-36, fig.cap="Distribution of the number of dives each day. The threshold used to calculate bADL is fixed at 50 dives per day.", fig.height=3}
ggplot(data_2018_filter[,.(nb_dives = .N), 
                        by = .(.id, day_departure)], 
       aes(x=nb_dives, fill=.id)) +
  geom_histogram(show.legend = FALSE) + 
  facet_grid(.~.id) +
  labs(y="# of days", x = "# of dives per day") +
  theme_jjo()
```

```{r data-exploration-2018-37, fig.cap="Behavioral ADL vs. drift rate along animals' trip (Am I the only one seeing some kind of relationship?)"}
# select day that have at least 50 dives
days_to_keep = data_2018_filter[,
                                .(nb_dives = .N),
                                by = .(.id, day_departure)] %>%
  .[nb_dives >= 50,]

# keep only those days
data_2018_filter_complete_day = merge(data_2018_filter,
                                      days_to_keep,
                                      by = c(".id", "day_departure"))

# data plot
dataPlot = data_2018_filter_complete_day[,
                                         .(badl = quantile(dduration, 0.95)),
                                         by = .(.id, day_departure)]

# combine two datasets to be able to use a second axis
# https://stackoverflow.com/questions/49185583/two-y-axes-with-different-scales-for-two-datasets-in-ggplot2
dataMegaPlot = rbind(data_2018_filter_complete_day[divetype == "2: drift"] %>%
                       .[, .(w = .id,
                             y = driftrate,
                             x = day_departure,
                             z = "second_plot")],
                     dataPlot[, .(
                       w = .id,
                       # tricky one
                       y = (badl / 1000) - 1,
                       x = day_departure,
                       z = "first_plot"
                     )])

# plot
ggplot() +
  geom_point(
    data = dataMegaPlot[z == "second_plot", ],
    aes(x = x, y = y),
    alpha = 1 / 10,
    size = 0.5,
    color = "grey40",
    show.legend = FALSE
  ) +
  geom_path(data = dataMegaPlot[z == "first_plot", ],
            aes(x = x, y = y, color = w),
            show.legend = FALSE) +
  scale_y_continuous(
    # Features of the first axis
    name = "Drift rate (m/s)",
    # Add a second axis and specify its features
    sec.axis = sec_axis( ~ (. * 1000) + 1000, 
                         name = "Behavioral Aerobic Dive Limit (s)")
  ) +
  labs(x = "# days since departure") +
  facet_wrap(w ~ .) +
  theme_jjo()
```

> Looking at this graph, I want to believe that there is some kind of relationship between the *bADL* as defined by [Shero et *al.* (2018)](https://www.researchgate.net/publication/222683042_To_breathe_or_not_to_breathe_Optimal_breathing_aerobic_dive_limit_and_oxygen_stores_in_deep-diving_blue-eyed_shags) and the drift rate (and so buyoancy).

```{r data-exploration-2018-38}
# get badl
dataplot_1 = data_2018_filter_complete_day[,
                              .(badl = quantile(dduration, 0.95)),
                              by = .(.id, day_departure)]
# get driftrate
dataplot_2 = data_2018_filter_complete_day[divetype == "2: drift",
                              .(driftrate = median(driftrate)),
                              by = .(.id, day_departure)]

# merge
dataPlot = merge(dataplot_1,
                 dataplot_2,
                 by = c(".id", "day_departure"),
                 all = TRUE)

# plot
ggplot(data = dataPlot, aes(x = badl, y = driftrate, col = .id)) +
  geom_point(show.legend = FALSE) +
  facet_wrap(.id~., scales = "free") +
  theme_jjo()
```

### {.unlisted .unnumbered .tabset .tabset-fade .tabset-pills}

#### ind_2018070 {.unlisted .unnumbered}

```{r data-exploration-2018-39}
# ind_2018070
plot_ly(
  x = dataPlot[.id == "ind_2018070", badl],
  y = dataPlot[.id == "ind_2018070", day_departure],
  z = dataPlot[.id == "ind_2018070", driftrate],
  type = "scatter3d",
  mode = "markers",
  marker = list(size = 2),
  color = dataPlot[.id == "ind_2018070", day_departure]
) %>% 
  layout(scene = list(xaxis = list(title = 'Behavioral ADL'),
                      yaxis = list(title = '# days since departure'),
                      zaxis = list(title = 'Drift rate (m/s)')))
```

#### ind_2018072 {.unlisted .unnumbered}

```{r data-exploration-2018-40}
# ind_2018072
plot_ly(
  x = dataPlot[.id == "ind_2018072", badl],
  y = dataPlot[.id == "ind_2018072", day_departure],
  z = dataPlot[.id == "ind_2018072", driftrate],
  type = "scatter3d",
  mode = "markers",
  marker = list(size = 2),
  color = dataPlot[.id == "ind_2018072", day_departure]
) %>% 
  layout(scene = list(xaxis = list(title = 'Behavioral ADL'),
                      yaxis = list(title = '# days since departure'),
                      zaxis = list(title = 'Drift rate (m/s)')))
```

#### ind_2018074 {.unlisted .unnumbered}

```{r data-exploration-2018-41}
# ind_2018074
plot_ly(
  x = dataPlot[.id == "ind_2018074", badl],
  y = dataPlot[.id == "ind_2018074", day_departure],
  z = dataPlot[.id == "ind_2018074", driftrate],
  type = "scatter3d",
  mode = "markers",
  marker = list(size = 2),
  color = dataPlot[.id == "ind_2018074", day_departure]
) %>% 
  layout(scene = list(xaxis = list(title = 'Behavioral ADL'),
                      yaxis = list(title = '# days since departure'),
                      zaxis = list(title = 'Drift rate (m/s)')))
```

#### ind_2018072 {.unlisted .unnumbered}

```{r data-exploration-2018-42}
# ind_2018080
plot_ly(
  x = dataPlot[.id == "ind_2018080", badl],
  y = dataPlot[.id == "ind_2018080", day_departure],
  z = dataPlot[.id == "ind_2018080", driftrate],
  type = "scatter3d",
  mode = "markers",
  marker = list(size = 2),
  color = dataPlot[.id == "ind_2018080", day_departure]
) %>% 
  layout(scene = list(xaxis = list(title = 'Behavioral ADL'),
                      yaxis = list(title = '# days since departure'),
                      zaxis = list(title = 'Drift rate (m/s)')))
```

## GPS data

```{r data-exploration-2018-43, fig.cap="Map with polylines", fig.width=8, eval=FALSE}
# This piece of code is only there to show how to draw a polylines with a 
# gradient color using leaflet.We're not using it due to the size of the 
# created map, and will continue using circle marker

# datasets used to display map
df_driftrate = unique(data_2018_filter[.id == "ind_2018070" & 
                                         divetype == "2: drift", 
                                       .(.id, lat, lon, dduration)])

# color palette
pal <- colorNumeric(
  palette = "YlGnBu",
  domain = df_driftrate$dduration
)

# add 
df_driftrate[, `:=`(nextLat = shift(lat),
                    nextLon = shift(lon),
                    color = pal(df_driftrate$dduration))]

# interactive map
gradient_map <- leaflet() %>%
  setView(lng = -122, lat = 38, zoom = 2) %>%
  addTiles() 

# add lines
for (i in 1:nrow(df_driftrate)) {
  gradient_map <- addPolylines(
    map = gradient_map,
    data = df_driftrate,
    lat = as.numeric(df_driftrate[i, c('lat', 'nextLat')]),
    lng = as.numeric(df_driftrate[i, c('lon', 'nextLon')]),
    color = df_driftrate[i, color],
    weight = 3,
    group = "individual_1"
  )
}

# add layer control
gradient_map <- addLayersControl(
  map = gradient_map,
  overlayGroups = c("individual_1"),
  options = layersControlOptions(collapsed = FALSE)
)

# format(object.size(gradient_map), units = "Mb")
```

Because for some data the contrast in changes was not enough marked, the only treatment applied on these data is to remove outliers for each variable using the interquartile range rule. 

```{r data-exploration-2018-44}
# interactive map
gradient_map <- leaflet() %>%
  setView(lng = -132, lat = 48, zoom = 4) %>%
  addTiles()

# loop by individuals and variable
grps = NULL
for (i in seq(data_2018_filter[!is.na(lat),unique(.id)])){
  for (k in c("dduration","maxdepth","efficiency", "driftrate")){
    if (k == "driftrate"){
      # set dataset used to plot
      dataPlot = unique(data_2018_filter %>% 
                          .[order(date),] %>% 
                          .[.id==data_2018_filter[!is.na(lat), unique(.id)][i] &
                              divetype == "2: drift" &
                              !is.na(get(k)), 
                            c("lat", "lon", k), with=FALSE] %>% 
                          .[!is_outlier(get(k)),])
      # color palette creation
      colPal <- colorNumeric(
        palette = "BrBG",
        domain = seq(-dataPlot[,max(abs(driftrate))],
                     dataPlot[,max(abs(driftrate))],
                     0.1)
      )
    } else {
      # set dataset used to plot
      dataPlot = unique(data_2018_filter %>% 
                          .[order(date),] %>% 
                          .[.id==data_2018_filter[!is.na(lat), unique(.id)][i] & 
                              divetype != "2: drift" &
                              !is.na(get(k)), 
                            c("lat", "lon", k), with=FALSE] %>% 
                          .[!is_outlier(get(k)),]) 
      # color palette creation
      colPal <- colorNumeric(
        palette = "YlGnBu",
        domain = dataPlot[,get(k)]
      )
    }
    
    # add color to dataset
    dataPlot[, color := colPal(dataPlot[,get(k)])]
    # add size column
    dataPlot[, radius := 3]
    # mark the beginning of the trip
    dataPlot[1, `:=` (color = "green",
                      radius = 4)]
    # mark the end of the trip
    dataPlot[.N, `:=` (color = "red",
                       radius = 4)]
    # reorder to make the end and the beginning in front
    dataPlot = rbind(dataPlot[-1,],dataPlot[1,])
    # add markers to map
    gradient_map <- addCircleMarkers(
      map = gradient_map,
      data = dataPlot,
      lat = ~lat,
      lng = ~lon,
      radius = ~radius,
      stroke = FALSE,
      color = ~color,
      fillOpacity = 1,
      group = paste(data_2018_filter[,unique(.id)][i], "-", k)
    ) %>% 
      addLegend("bottomleft", 
                data = dataPlot,
                group = paste(data_2018_filter[,unique(.id)][i], "-", k),
                pal = colPal, 
                values = ~get(k),
                title = k,
                opacity = 1
      )
    # retrieve groups
    grps = c(grps, paste(data_2018_filter[,unique(.id)][i], "-", k))
  }
}

# add layer control
gradient_map <- addLayersControl(
  map = gradient_map,
  overlayGroups = grps,
  options = layersControlOptions(collapsed = TRUE)
) %>% hideGroup(grps)
```

```{r data-exploration-2018-45, fig.cap="Tracking data 2018 individuals (green and red dot respectively indicate the beginning and the end of each trip)", fig.width=8}
# display
gradient_map
```

